#!/usr/bin/python
#encoding=UTF-8

import urllib,re

keyword = "西安" 

def getSrc(url):
    wp = urllib.urlopen(url)
    result = wp.read()
    src = re.search('来源.*&nbsp',result).group(0).replace('&nbsp;&nbsp;&nbsp','').replace('来源：','')
    return src
    
def searchWord(s):
    
    url = 'http://xinhuapo.com/index.php?m=search&c=index&a=init&typeid=1&siteid=1&q='+s+'&Submit=搜索'
    wp = urllib.urlopen(url)
    result = wp.read()
    
    time = re.findall('发布时间.*</div>',result)
    result = re.findall('<h5>.*</h5>',result)
    
    res=[[' ' for col in range(4)] for row in range(10)]
    for i in range(0,len(result)):
        url = re.search('http.*html',result[i]).group(0)
        keyword = re.search('html">.*</a',result[i]).group(0).replace('html">','').replace('<font color=red>','').replace('</font>','').replace('</a','')        
        pulishtime = re.search('\d{4}-\d{2}-\d{2}',time[i]).group(0)
        src = getSrc(url)

        
        res[i][0]=keyword
        res[i][1]=pulishtime
        res[i][2]=src
        res[i][3]=url
    return res
        

            
if __name__== "__main__":
    o = searchWord(keyword)
    for i in range(10):
        for j in range(4):
            print o[i][j]
